Dataset downloaded from mgandal’s github repository.
# Load csvs
datExpr = read.csv('./../Data/Gandal/RNAseq_ASD_datExpr.csv', row.names=1)
datMeta = read.csv('./../Data/Gandal/RNAseq_ASD_datMeta.csv')
# Group brain regions by lobes
datMeta$Brain_Region = as.factor(datMeta$Region)
datMeta$Brain_lobe = 'Occipital'
datMeta$Brain_lobe[datMeta$Brain_Region %in% c('BA4_6', 'BA9', 'BA24', 'BA44_45')] = 'Frontal'
datMeta$Brain_lobe[datMeta$Brain_Region %in% c('BA3_1_2_5', 'BA7')] = 'Parietal'
datMeta$Brain_lobe[datMeta$Brain_Region %in% c('BA38', 'BA39_40', 'BA20_37', 'BA41_42_22')] = 'Temporal'
datMeta$Brain_lobe=factor(datMeta$Brain_lobe, levels=c('Frontal', 'Temporal', 'Parietal', 'Occipital'))
# Remove '/' from Batch variable: (It is recommended (but not required) to use only letters, numbers,
# and delimiters '_' or '.', in levels of factors as these are safe characters for column names in R
datMeta$Batch = gsub('/', '.', datMeta$RNAExtractionBatch) %>% as.factor
# Transform Diagnosis into a factor variable
datMeta$Diagnosis_ = factor(datMeta$Diagnosis_, levels=c('CTL','ASD'))
Filter Frontal lobe samples
datExpr = datExpr %>% dplyr::select(paste0('X',datMeta$Dissected_Sample_ID[datMeta$Brain_lobe=='Frontal']))
datMeta = datMeta %>% filter(Brain_lobe=='Frontal')
Data description taken from the dataset’s synapse entry: RNAseq data was generated from 88 postmortem cortex brain samples from subjects with ASD (53 samples from 24 subjects) and non-psychiatric controls (35 samples from 17 subjects), across four cortical regions encompassing all major cortical lobes – frontal, temporal, parietal, and occipital. Brain samples were obtained from the Harvard Brain Bank as part of the Autism Tissue Project (ATP).
print(paste0('Dataset includes ', nrow(datExpr), ' genes from ', ncol(datExpr), ' samples belonging to ', length(unique(datMeta$Subject_ID)), ' different subjects.'))
## [1] "Dataset includes 63682 genes from 22 samples belonging to 22 different subjects."
Diagnosis distribution: There are more ASD samples than controls
table(datMeta$Diagnosis_)
##
## CTL ASD
## 13 9
Sex distribution: There are only 3 Female samples
table(datMeta$Sex)
##
## F M
## 3 19
Age distribution: Subjects between 2 and 56 years old with a mean close to 30
summary(datMeta$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 19.25 25.50 27.55 36.50 56.00
1. Filter genes with start or end position missing
to_keep = !is.na(datGenes$length)
datGenes = datGenes[to_keep,]
datExpr = datExpr[to_keep,]
rownames(datGenes) = datGenes$ensembl_gene_id
print(paste0('Removed ', sum(!to_keep), ' genes, ', sum(to_keep), ' remaining'))
## [1] "Removed 5 genes, 63677 remaining"
2. Filter genes with low expression levels
\(\qquad\) 2.1 Remove genes with zero expression in all of the samples
to_keep = rowSums(datExpr)>0
datGenes = datGenes[to_keep,]
datExpr = datExpr[to_keep,]
print(paste0('Removed ', sum(!to_keep), ' genes, ', sum(to_keep), ' remaining'))
## [1] "Removed 19311 genes, 44366 remaining"
\(\qquad\) 2.2 Removing genes with a mean expression lower than a given threhsold
There is a weird behaviour below 0.12, so I had chosen this to be the original threshold but it turned out to be too low for sva to work properly, so increased the threhsold to 0.6, where the next change in density happens and it still didn’t work, so ended up chosing as threshold the lowest values for which the sva function worked (2)
The next big valley is formed around 50, but that would leave only 14K genes
threshold = 2
plot_data = data.frame('id'=rownames(datExpr), 'mean_expression' = rowMeans(datExpr))
ggplotly(plot_data %>% ggplot(aes(x=mean_expression)) + geom_density(color='#0099cc', fill='#0099cc', alpha=0.3) +
geom_vline(xintercept=threshold, color='gray') + scale_x_log10() +
ggtitle('gene Mean Expression distribution') + theme_minimal())
to_keep = rowMeans(datExpr)>threshold
datGenes = datGenes[to_keep,]
datExpr = datExpr[to_keep,]
print(paste0('Removed ', sum(!to_keep), ' genes, ', sum(to_keep), ' remaining'))
## [1] "Removed 17801 genes, 26565 remaining"
3. Filter outlier samples
\(\qquad\) 3.1 Gandal filters samples belonging to subject AN03345 without giving an explanation. Since it could have some technical problems, I remove them as well
to_keep = (datMeta$Subject_ID != 'AN03345')
datMeta = datMeta[to_keep,]
datExpr = datExpr[,to_keep]
print(paste0('Removed ', sum(!to_keep), ' sample, ', sum(to_keep), ' remaining'))
## [1] "Removed 1 sample, 21 remaining"
\(\qquad\) 3.2 Filter out outliers: Using node connectivity as a distance measure, normalising it and filtering out genes farther away than 2 standard deviations from the left (lower connectivity than average, not higher)
Gandal uses the formula \(s_{ij}=\frac{1+bw(i,j)}{2}\) to convert all the weights to positive values, but I used \(s_{ij}=|bw(i,j)|\) instead because I think it makes more sense. In the end it doesn’t matter because they select as outliers the same samples
The outlier is a 27 year old Male with autism who happens to have the lowest PMI of the dataset (8.3)
absadj = datExpr %>% bicor %>% abs
netsummary = fundamentalNetworkConcepts(absadj)
ku = netsummary$Connectivity
z.ku = (ku-mean(ku))/sqrt(var(ku))
plot_data = data.frame('sample'=1:length(z.ku), 'distance'=z.ku, 'Sample_ID'=datMeta$Sample_ID,
'Subject_ID'=datMeta$Subject_ID, 'Extraction_Batch'=datMeta$RNAExtractionBatch,
'Brain_Lobe'=datMeta$Brain_lobe, 'Sex'=datMeta$Sex, 'Age'=datMeta$Age,
'Diagnosis'=datMeta$Diagnosis_, 'PMI'=datMeta$PMI)
selectable_scatter_plot(plot_data, plot_data[,-c(1,2)])
print(paste0('Outlier samples: ', paste(as.character(plot_data$Sample_ID[plot_data$distance< -2]), collapse=', ')))
## [1] "Outlier samples: AN00493_BA4_6"
to_keep = z.ku > -2
datMeta = datMeta[to_keep,]
datExpr = datExpr[,to_keep]
print(paste0('Removed ', sum(!to_keep), ' sample(s), ', sum(to_keep), ' remaining'))
## [1] "Removed 1 sample(s), 20 remaining"
rm(absadj, netsummary, ku, z.ku, plot_data, to_keep)
print(paste0('After filtering, the dataset consists of ', nrow(datExpr), ' genes and ', ncol(datExpr), ' samples'))
## [1] "After filtering, the dataset consists of 26565 genes and 20 samples"
According to Tackling the widespread and critical impact of batch effects in high-throughput data, technical artifacts can be an important source of variability in the data, so batch correction should be part of the standard preprocessing pipeline of gene expression data.
They say Processing group and Date of the experiment are good batch surrogates, so I’m going to see if they affect the data in any clear way to use them as surrogates.
All the information we have is the Brain Bank, and although all the samples were obtained from the Autism Tissue Project, we don’t have any more specific information about who preprocessed each sample
table(datMeta$Brain_Bank)
##
## ATP
## 20
There are two different dates when the data was procesed. Most samples belong to the October batch
table(datMeta$RNAExtractionBatch)
##
## 10/10/2014 6/20/2014
## 14 6
All but one of the autism samples come from the October batch. This could be a problem when performing supervised batch correction (comBat) because by removing batch effect we could be removing also the biological signal related to diagnosis.
table(datMeta$RNAExtractionBatch, datMeta$Diagnosis_)
##
## CTL ASD
## 10/10/2014 8 6
## 6/20/2014 5 1
Samples don’t seem to cluster together that strongly for each batch, although there does seem to be some kind of relation
There doesn’t seem to be a strong relation with the other variables, including diagnosis
h_clusts = datExpr %>% t %>% dist %>% hclust %>% as.dendrogram
create_viridis_dict = function(){
min_age = datMeta$Age %>% min
max_age = datMeta$Age %>% max
viridis_age_cols = viridis(max_age - min_age + 1)
names(viridis_age_cols) = seq(min_age, max_age)
return(viridis_age_cols)
}
viridis_age_cols = create_viridis_dict()
dend_meta = datMeta[match(substring(labels(h_clusts),2), datMeta$Dissected_Sample_ID),] %>%
mutate('Batch' = ifelse(RNAExtractionBatch=='10/10/2014', '#F8766D', '#00BFC4'),
'Diagnosis' = ifelse(Diagnosis_=='CTL','#008080','#86b300'), # Blue control, Green ASD
'Sex' = ifelse(Sex=='F','#ff6666','#008ae6'), # Pink Female, Blue Male
'Age' = viridis_age_cols[as.character(Age)]) %>% # Purple: young, Yellow: old
dplyr::select(Age, Sex, Diagnosis, Batch)
h_clusts %>% set('labels', rep('', nrow(datMeta))) %>% set('branches_k_color', k=9) %>% plot
colored_bars(colors=dend_meta)
rm(h_clusts, dend_meta, create_viridis_dict, viridis_age_cols)
There seems to be a different behaviour by batch mainly in the second and third principal components, although this behaviour could be related to diagnosis
pca = datExpr %>% t %>% prcomp
summary(pca)$importance[,1:3]
## PC1 PC2 PC3
## Standard deviation 202055.45733 91840.41339 68156.44883
## Proportion of Variance 0.64481 0.13322 0.07337
## Cumulative Proportion 0.64481 0.77803 0.85139
plot_data = data.frame('ID'=colnames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2], 'PC3' = pca$x[,3]) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
mutate('Batch'=RNAExtractionBatch) %>% dplyr::select('PC1','PC2','PC3','Batch')
plot_data %>% ggpairs(progress=FALSE, aes(colour=Batch, fill=Batch, alpha=0.3)) + theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
rm(pca, plot_data)
Comparing the mean expression of each sample by batch we can see the batch effect is reflected here as well, although, again, this could be related to diagnosis
plot_data_b1 = data.frame('Mean'=colMeans(datExpr[,datMeta$RNAExtractionBatch=='10/10/2014']), 'Batch'='10/10/2014')
plot_data_b2 = data.frame('Mean'=colMeans(datExpr[,datMeta$RNAExtractionBatch=='6/20/2014']), 'Batch'='6/20/2014')
plot_data = rbind(plot_data_b1, plot_data_b2)
mu = plot_data %>% group_by(Batch) %>% dplyr::summarise(BatchMean=mean(Mean))
ggplotly(plot_data %>% ggplot(aes(x=Mean, color=Batch, fill=Batch)) + geom_density(alpha=0.3) +
geom_vline(data=mu, aes(xintercept=BatchMean, color=Batch), linetype='dashed') +
ggtitle('Mean expression by sample grouped by Batch') + scale_x_log10() + theme_minimal())
rm(plot_data_b1, plot_data_b2, plot_data, mu)
Following the pipeline from Surrogate variable analysis: hidden batch effects where sva is used with DESeq2.
Create a DeseqDataSet object, estimate the library size correction and save the normalized counts matrix
counts = datExpr %>% as.matrix
rowRanges = GRanges(datGenes$chromosome_name,
IRanges(datGenes$start_position, width=datGenes$length),
strand=datGenes$strand,
feature_id=datGenes$ensembl_gene_id)
se = SummarizedExperiment(assays=SimpleList(counts=counts), rowRanges=rowRanges, colData=datMeta)
dds = DESeqDataSet(se, design = ~ Diagnosis_)
dds = estimateSizeFactors(dds)
norm.cts = counts(dds, normalized=TRUE)
Provide the normalized counts and two model matrices to SVA. The first matrix uses the biological condition, and the second model matrix is the null model.
mod = model.matrix(~ Diagnosis_, colData(dds))
mod0 = model.matrix(~ 1, colData(dds))
sva_fit = svaseq(norm.cts, mod=mod, mod0=mod0)
## Number of significant surrogate variables is: 4
## Iteration (out of 5 ):1 2 3 4 5
rm(mod, mod0)
Found 4 surrogate variables, since there is no direct way to select which ones to pick Bioconductor answer, kept all of them.
Include SV estimations to datMeta information
sv_data = sva_fit$sv %>% data.frame
colnames(sv_data) = paste0('SV',1:ncol(sv_data))
datMeta_sva = cbind(datMeta, sv_data)
rm(sv_data)
In conclusion: Date of extraction could work as a surrogate for batch effect but we cannot use it because it is correlated to the diagnosis. The sva package found other 4 variables that could work as surrogates which are now included in datMeta and should be included in the DEA.
Using DESeq2 package to perform normalisation. Chose this package over limma because limma uses the log transformed data as input instead of the raw counts and I have discovered that in this dataset, this transformation affects genes differently depending on their mean expression level, and genes with a high SFARI score are specially affected by this.
plot_data = data.frame('ID'=rownames(datExpr), 'Mean'=rowMeans(datExpr), 'SD'=apply(datExpr,1,sd))
plot_data %>% ggplot(aes(Mean, SD)) + geom_point(color='#0099cc', alpha=0.1) + geom_abline(color='gray') +
scale_x_log10() + scale_y_log10() + theme_minimal()
rm(plot_data)
Using vst instead of rlog to perform normalisation. Bioconductor question explaining differences between methods. Chose vst because a) it is much faster than rlog (it is recommended to use vst for samples larger than 50), and b) Michael Love (author of DESEq2) recommends using it over rlog
Including a log fold change threshold of 0 in the results formula \(H_0:lfc=0\) because setting any other log fold change seems arbitrary and we risk losing genes with a significant differential expression for genes with a higher difference, but not necessarily as significant.
counts = datExpr %>% as.matrix
rowRanges = GRanges(datGenes$chromosome_name,
IRanges(datGenes$start_position, width=datGenes$length),
strand=datGenes$strand,
feature_id=datGenes$ensembl_gene_id)
se = SummarizedExperiment(assays=SimpleList(counts=counts), rowRanges=rowRanges, colData=datMeta_sva)
dds = DESeqDataSet(se, design = ~ Batch + SV1 + SV2 + SV3 + SV4 + Diagnosis_)
# Perform DEA
dds = DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
DE_info = results(dds, lfcThreshold=0, altHypothesis='greaterAbs')
# Perform vst
vsd = vst(dds)
datExpr_vst = assay(vsd)
datMeta_vst = colData(vsd)
datGenes_vst = rowRanges(vsd)
rm(counts, rowRanges, se, vsd)
Using the plotting function DESEq2’s manual proposes to study vst’s output it looks like the data could be homoscedastic
meanSdPlot(datExpr_vst, plot=FALSE)$gg + theme_minimal()
When plotting point by point we see the homoscedasticity is not as robust as it seemed in the first plot, but is good enough
plot_data = data.frame('ID'=rownames(datExpr_vst), 'Mean'=rowMeans(datExpr_vst), 'SD'=apply(datExpr_vst,1,sd))
plot_data %>% ggplot(aes(Mean, SD)) + geom_point(color='#0099cc', alpha=0.05) +
scale_x_log10() + scale_y_log10() + theme_minimal()
rm(plot_data)
save(datExpr, datMeta, datGenes, file='./../Data/Gandal/filtered_raw_data_frontal.RData')
#load('./../Data/Gandal/filtered_raw_data_frontal.RData')
datExpr = datExpr_vst
datMeta = datMeta_vst %>% data.frame
datGenes = datGenes_vst
print(paste0('After filtering, the dataset consists of ', nrow(datExpr), ' genes and ', ncol(datExpr), ' samples'))
## [1] "After filtering, the dataset consists of 26565 genes and 20 samples"
rm(datExpr_vst, datMeta_vst, datGenes_vst)
By including the surrogate variables in the DESeq formula we only modelled the batch effects into the DEA, but we didn’t actually correct them from the data, for that we need to use ComBat (or other equivalent package) in the already normalised data
In some places they say you shouldn’t correct these effects on the data because you risk losing biological variation, in others they say you should because they introduce noise to the data. The only thing everyone agrees on is that you shouldn’t remove them before performing DEA but instead include them in the model.
Based on the conclusions from Practical impacts of genomic data “cleaning” on biological discovery using surrogate variable analysis it seems like it may be a good idea to remove the batch effects from the data and not only from the DE analysis:
Using SVA, ComBat or related tools can increase the power to identify specific signals in complex genomic datasets (they found “greatly sharpened global and gene-specific differential expression across treatment groups”)
But caution should be exercised to avoid removing biological signal of interest
We must be precise and deliberate in the design and analysis of experiments and the resulting data, and also mindful of the limitations we impose with our own perspective
Open data exploration is not possible after such supervised “cleaning”, because effects beyond those stipulated by the researcher may have been removed
# Taken from https://www.biostars.org/p/121489/#121500
correctDatExpr = function(datExpr, mod, svs) {
X = cbind(mod, svs)
Hat = solve(t(X) %*% X) %*% t(X)
beta = (Hat %*% t(datExpr))
rm(Hat)
gc()
P = ncol(mod)
return(datExpr - t(as.matrix(X[,-c(1:P)]) %*% beta[-c(1:P),]))
}
pca_samples_before = datExpr %>% t %>% prcomp
pca_genes_before = datExpr %>% prcomp
# Correct
mod = model.matrix(~ Diagnosis_, colData(dds))
svs = datMeta %>% dplyr::select(SV1:SV4) %>% as.matrix
datExpr_corrected = correctDatExpr(as.matrix(datExpr), mod, svs)
pca_samples_after = datExpr_corrected %>% t %>% prcomp
pca_genes_after = datExpr_corrected %>% prcomp
Now I understand why they say you lose the biological signal
pca_samples_df = rbind(data.frame('ID'=colnames(datExpr), 'PC1'=pca_samples_before$x[,1],
'PC2'=pca_samples_before$x[,2], 'corrected'=0),
data.frame('ID'=colnames(datExpr), 'PC1'=pca_samples_after$x[,1],
'PC2'=pca_samples_after$x[,2], 'corrected'=1)) %>%
left_join(datMeta %>% mutate('ID'=rownames(datMeta)), by='ID')
ggplotly(pca_samples_df %>% ggplot(aes(PC1, PC2, color=Diagnosis_)) + geom_point(aes(frame=corrected, id=ID)) +
xlab(paste0('PC1 (corr=', round(cor(pca_samples_before$x[,1],pca_samples_after$x[,1]),2),
'). % Var explained: ', round(100*summary(pca_samples_before)$importance[2,1],1),' to ',
round(100*summary(pca_samples_after)$importance[2,1],1))) +
ylab(paste0('PC2 (corr=', round(cor(pca_samples_before$x[,2],pca_samples_after$x[,2]),2),
'). % Var explained: ', round(100*summary(pca_samples_before)$importance[2,2],1),' to ',
round(100*summary(pca_samples_after)$importance[2,2],1))) +
ggtitle('Samples') + theme_minimal())
rm(pca_samples_df)
It seems like the sva correction preserves the mean expression of the genes and erases almost everything else (although what little else remains is enough to perfectly characterise the two Diagnosis groups)
*Plot is done with only 10% of the genes because it was too heavy otherwise
pca_genes_df = rbind(data.frame('ID'=rownames(datExpr), 'PC1'=pca_genes_before$x[,1],
'PC2'=pca_genes_before$x[,2], 'corrected'=0, 'MeanExpr'=rowMeans(datExpr)),
data.frame('ID'=rownames(datExpr), 'PC1'=pca_genes_after$x[,1],
'PC2'=pca_genes_after$x[,2], 'corrected'=1, 'MeanExpr'=rowMeans(datExpr)))
keep_genes = rownames(datExpr) %>% sample(0.1*nrow(datExpr))
pca_genes_df = pca_genes_df %>% filter(ID %in% keep_genes)
ggplotly(pca_genes_df %>% ggplot(aes(PC1, PC2,color=MeanExpr)) + geom_point(alpha=0.3, aes(frame=corrected, id=ID)) +
xlab(paste0('PC1 (corr=', round(cor(pca_genes_before$x[,1],pca_genes_after$x[,1]),2),
'). % Var explained: ', round(100*summary(pca_genes_before)$importance[2,1],1),' to ',
round(100*summary(pca_genes_after)$importance[2,1],1))) +
ylab(paste0('PC2 (corr=', round(cor(pca_genes_before$x[,2],pca_genes_after$x[,2]),2),
'). % Var explained: ', round(100*summary(pca_genes_before)$importance[2,2],1),' to ',
round(100*summary(pca_genes_after)$importance[2,2],1))) +
scale_color_viridis() + ggtitle('Genes') + theme_minimal())
rm(pca_samples_before, pca_genes_before, mod, svs, pca_samples_after, pca_genes_after, pca_samples_df, keep_genes)
Decided to keep the corrected expression dataset
datExpr = datExpr_corrected
There seems to still be a difference in the behaviour in the second and third PC to the processing date, although not as big as before
pca = datExpr %>% t %>% prcomp
summary(pca)$importance[,1:3]
## PC1 PC2 PC3
## Standard deviation 13.89120 13.22384 11.55192
## Proportion of Variance 0.13021 0.11800 0.09005
## Cumulative Proportion 0.13021 0.24821 0.33826
plot_data = data.frame('ID'=colnames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2], 'PC3' = pca$x[,3]) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
mutate('Batch'=RNAExtractionBatch) %>% dplyr::select('PC1','PC2','PC3','Batch')
plot_data %>% ggpairs(progress=FALSE, aes(colour=Batch, fill=Batch, alpha=0.3)) + theme_minimal()
rm(pca, plot_data)
Even after correcting the dataset for the surrogate variables found with sva, there is still a difference in mean expression by processing date
plot_data_b1 = data.frame('Mean'=colMeans(datExpr[,datMeta$RNAExtractionBatch=='10/10/2014']), 'Batch'='10/10/2014')
plot_data_b2 = data.frame('Mean'=colMeans(datExpr[,datMeta$RNAExtractionBatch=='6/20/2014']), 'Batch'='6/20/2014')
plot_data = rbind(plot_data_b1, plot_data_b2)
mu = plot_data %>% group_by(Batch) %>% dplyr::summarise(BatchMean=mean(Mean))
ggplotly(plot_data %>% ggplot(aes(x=Mean, color=Batch, fill=Batch)) + geom_density(alpha=0.3) +
geom_vline(data=mu, aes(xintercept=BatchMean, color=Batch), linetype='dashed') +
ggtitle('Mean expression by sample grouped by processing date') + scale_x_log10() + theme_minimal())
rm(plot_data_b1, plot_data_b2, plot_data, mu)
Since there is a correlation between diagnosis and processing date, I won’t correct for this batch effect because I may lose important biological signals related to autism
save(datExpr, datMeta, datGenes, DE_info, dds, file='./../Data/Gandal/preprocessed_data_frontal.RData')
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] hexbin_1.27.2 dendextend_1.10.0
## [3] vsn_3.50.0 WGCNA_1.66
## [5] fastcluster_1.1.25 dynamicTreeCut_1.63-1
## [7] sva_3.30.1 genefilter_1.64.0
## [9] mgcv_1.8-26 nlme_3.1-137
## [11] DESeq2_1.22.2 SummarizedExperiment_1.12.0
## [13] DelayedArray_0.8.0 BiocParallel_1.16.6
## [15] matrixStats_0.54.0 Biobase_2.42.0
## [17] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [19] IRanges_2.16.0 S4Vectors_0.20.1
## [21] BiocGenerics_0.28.0 biomaRt_2.38.0
## [23] ggExtra_0.8 GGally_1.4.0
## [25] gridExtra_2.3 viridis_0.5.1
## [27] viridisLite_0.3.0 RColorBrewer_1.1-2
## [29] plotlyutils_0.0.0.9000 plotly_4.8.0
## [31] glue_1.3.1 reshape2_1.4.3
## [33] forcats_0.3.0 stringr_1.4.0
## [35] dplyr_0.8.0.1 purrr_0.3.1
## [37] readr_1.3.1 tidyr_0.8.3
## [39] tibble_2.1.1 ggplot2_3.1.0
## [41] tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.1.0 backports_1.1.2 Hmisc_4.1-1
## [4] plyr_1.8.4 lazyeval_0.2.2 splines_3.5.2
## [7] crosstalk_1.0.0 robust_0.4-18 digest_0.6.18
## [10] foreach_1.4.4 htmltools_0.3.6 GO.db_3.7.0
## [13] magrittr_1.5 checkmate_1.8.5 memoise_1.1.0
## [16] fit.models_0.5-14 cluster_2.0.7-1 doParallel_1.0.14
## [19] limma_3.38.3 annotate_1.60.1 modelr_0.1.4
## [22] prettyunits_1.0.2 colorspace_1.4-1 blob_1.1.1
## [25] rvest_0.3.2 rrcov_1.4-3 haven_1.1.1
## [28] xfun_0.5 crayon_1.3.4 RCurl_1.95-4.10
## [31] jsonlite_1.6 impute_1.56.0 survival_2.43-3
## [34] iterators_1.0.9 gtable_0.2.0 zlibbioc_1.28.0
## [37] XVector_0.22.0 kernlab_0.9-27 prabclus_2.2-7
## [40] DEoptimR_1.0-8 scales_1.0.0 mvtnorm_1.0-7
## [43] DBI_1.0.0 miniUI_0.1.1.1 Rcpp_1.0.1
## [46] xtable_1.8-3 progress_1.2.0 htmlTable_1.11.2
## [49] mclust_5.4 foreign_0.8-71 bit_1.1-14
## [52] preprocessCore_1.44.0 Formula_1.2-3 htmlwidgets_1.3
## [55] httr_1.4.0 fpc_2.1-11.1 modeltools_0.2-22
## [58] acepack_1.4.1 flexmix_2.3-15 pkgconfig_2.0.2
## [61] reshape_0.8.7 XML_3.98-1.11 nnet_7.3-12
## [64] locfit_1.5-9.1 labeling_0.3 tidyselect_0.2.5
## [67] rlang_0.3.2 later_0.8.0 AnnotationDbi_1.44.0
## [70] munsell_0.5.0 cellranger_1.1.0 tools_3.5.2
## [73] cli_1.1.0 generics_0.0.2 RSQLite_2.1.1
## [76] broom_0.5.1 evaluate_0.13 yaml_2.2.0
## [79] knitr_1.22 bit64_0.9-7 robustbase_0.93-0
## [82] whisker_0.3-2 mime_0.6 xml2_1.2.0
## [85] compiler_3.5.2 rstudioapi_0.7 curl_3.3
## [88] affyio_1.52.0 geneplotter_1.60.0 pcaPP_1.9-73
## [91] stringi_1.4.3 trimcluster_0.1-2.1 lattice_0.20-38
## [94] Matrix_1.2-15 pillar_1.3.1 BiocManager_1.30.4
## [97] data.table_1.12.0 bitops_1.0-6 httpuv_1.5.0
## [100] affy_1.60.0 R6_2.4.0 latticeExtra_0.6-28
## [103] promises_1.0.1 codetools_0.2-15 MASS_7.3-51.1
## [106] assertthat_0.2.1 withr_2.1.2 GenomeInfoDbData_1.2.0
## [109] diptest_0.75-7 hms_0.4.2 grid_3.5.2
## [112] rpart_4.1-13 class_7.3-14 rmarkdown_1.12
## [115] Cairo_1.5-9 shiny_1.2.0 lubridate_1.7.4
## [118] base64enc_0.1-3